import ma.const

import random
import os

no_of_files = 1000
file_size = 32 * 1024 * 1024
words_inserted = 0
job_id = 9
start_file = 0

main_dest_dir = "/state/partition1/datasets/"

keys = [ \
        ('ma.mf.ah',90), \
        ('pd.pc.pa',2), \
        ('qd.qc.qa',1), \
        ('rd.rc.ra',1), \
        ('ud.uc.ua',1), \
        ('vd.vc.va',1), \
        ('wd.wc.wa',1), \
        ('td.tc.ta',1), \
        ('sd.sc.sa',1), \
        ('garbage.com',1) \
    ]

keys_count = { \
        'ma.mf.ah':0, \
        'pd.pc.pa':0, \
        'qd.qc.qa':0, \
        'rd.rc.ra':0, \
        'ud.uc.ua':0, \
        'vd.vc.va':0, \
        'wd.wc.wa':0, \
        'td.tc.ta':0, \
        'sd.sc.sa':0, \
        'garbage.com':0 \
    }


def make_list():
    return_list = []
    for i in keys:
        for y in range(i[1]):
            return_list.append(i[0])
    return return_list


if __name__ == '__main__':
    list = make_list()
    dir = main_dest_dir + str(job_id)
    
    # ensure the destination directory exists
    if not os.path.exists(dir):
        os.makedirs(dir)
    
    for file_no in range(start_file, no_of_files):    
        filename = ma.const.JobsXmlData.get_str_data(ma.const.xml_map_input_filename, job_id, file_no)
        filepath = dir + os.sep + filename
        
        print(filepath)
        fd = open(filepath, 'w+')
        random.seed()
        
        curr_file_size = 0
        words_inserted = 0
        separator = ' '
        while curr_file_size < file_size:
            word = random.choice(list)
            keys_count[word] += 1
            word = word + separator
            curr_file_size += len(word)
            words_inserted += 1
            if words_inserted % 100 == 0:
                random.seed()
            fd.write(word)
        
        print('File output', filename, 'Word count', words_inserted)
        
        fd.close()
        
    print("Count of keys:\n", keys_count)
        